On commence par charger les données, en ne gardant que les communes de plus de 10 000 habitants (i.e. nombre de personnes en RP > 10 000 en 2022) :

data = fread("../base_2012_2022.csv", encoding = "UTF-8")

data_com = data[annee == 2022, .(pop = sum(nb_personnes_en_RP)),
                by = c("COM", "annee")]

liste_com = data_com[pop >10000, COM]

df = data[COM %in% liste_com]

On réalise l’APC à l’aide du package FactoMineR. On réalise plusieurs ACP.

ACP sur les données 2017

df_num <- df[annee==2017, .SD, .SDcols = is.numeric]
  
df_num = df_num[,-"annee"]

res <- PCA(df_num,
           scale.unit = TRUE,
           graph = FALSE)

summary(res)
## 
## Call:
## PCA(X = df_num, scale.unit = TRUE, graph = FALSE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance              13.638   3.641   2.284   1.397   1.031   0.827   0.740
## % of var.             52.454  14.006   8.785   5.374   3.967   3.180   2.848
## Cumulative % of var.  52.454  66.460  75.245  80.619  84.586  87.766  90.614
##                        Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13  Dim.14
## Variance               0.470   0.395   0.366   0.292   0.238   0.199   0.181
## % of var.              1.809   1.521   1.409   1.123   0.914   0.764   0.696
## Cumulative % of var.  92.423  93.943  95.352  96.475  97.389  98.153  98.849
##                       Dim.15  Dim.16  Dim.17  Dim.18  Dim.19  Dim.20  Dim.21
## Variance               0.121   0.097   0.061   0.015   0.006   0.000   0.000
## % of var.              0.466   0.372   0.233   0.057   0.021   0.001   0.000
## Cumulative % of var.  99.315  99.688  99.921  99.978  99.999 100.000 100.000
##                       Dim.22  Dim.23  Dim.24  Dim.25  Dim.26
## Variance               0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000
## 
## Individuals (the 10 first)
##                                 Dist    Dim.1    ctr   cos2    Dim.2    ctr
## 1                           |  2.739 | -2.041  0.002  0.556 | -0.342  0.000
## 2                           |  5.449 |  4.272  0.010  0.615 | -0.937  0.002
## 3                           |  6.862 |  6.031  0.020  0.773 |  0.560  0.001
## 4                           |  8.166 |  6.617  0.024  0.657 |  4.320  0.038
## 5                           |  4.888 |  3.172  0.005  0.421 | -1.595  0.005
## 6                           |  2.516 |  0.418  0.000  0.028 |  1.265  0.003
## 7                           |  2.427 |  0.570  0.000  0.055 |  1.563  0.005
## 8                           |  2.745 |  0.538  0.000  0.038 | -0.291  0.000
## 9                           |  3.267 | -2.534  0.003  0.602 |  0.955  0.002
## 10                          |  6.491 |  3.841  0.008  0.350 |  4.527  0.041
##                               cos2    Dim.3    ctr   cos2  
## 1                            0.016 | -0.831  0.002  0.092 |
## 2                            0.030 | -2.434  0.019  0.200 |
## 3                            0.007 | -2.515  0.020  0.134 |
## 4                            0.280 |  0.118  0.000  0.000 |
## 5                            0.107 | -2.035  0.013  0.173 |
## 6                            0.253 | -1.133  0.004  0.203 |
## 7                            0.415 | -0.925  0.003  0.145 |
## 8                            0.011 | -2.287  0.017  0.694 |
## 9                            0.085 | -1.022  0.003  0.098 |
## 10                           0.486 | -1.157  0.004  0.032 |
## 
## Variables (the 10 first)
##                                Dim.1    ctr   cos2    Dim.2    ctr   cos2  
## nb_menages                  |  0.975  6.967  0.950 | -0.100  0.277  0.010 |
## nb_personnes_menage         |  0.966  6.838  0.933 |  0.130  0.463  0.017 |
## nb_logements                |  0.838  5.145  0.702 | -0.124  0.419  0.015 |
## nb_RP_1_piece               |  0.458  1.535  0.209 | -0.611 10.254  0.373 |
## nb_RP_2_pieces              |  0.683  3.424  0.467 | -0.552  8.355  0.304 |
## nb_RP_3_pieces              |  0.793  4.607  0.628 | -0.287  2.265  0.082 |
## nb_RP_4_pieces              |  0.707  3.670  0.500 |  0.374  3.838  0.140 |
## nb_RP_5_piece_et_plus       |  0.429  1.348  0.184 |  0.735 14.820  0.540 |
## nb_RP_en_loc                |  0.742  4.032  0.550 | -0.617 10.440  0.380 |
## nb_RP_proprio               |  0.666  3.256  0.444 |  0.628 10.840  0.395 |
##                              Dim.3    ctr   cos2  
## nb_menages                   0.076  0.254  0.006 |
## nb_personnes_menage         -0.134  0.783  0.018 |
## nb_logements                 0.201  1.762  0.040 |
## nb_RP_1_piece                0.415  7.550  0.172 |
## nb_RP_2_pieces               0.265  3.063  0.070 |
## nb_RP_3_pieces              -0.177  1.373  0.031 |
## nb_RP_4_pieces              -0.362  5.729  0.131 |
## nb_RP_5_piece_et_plus        0.061  0.163  0.004 |
## nb_RP_en_loc                -0.149  0.967  0.022 |
## nb_RP_proprio                0.298  3.894  0.089 |
# explor(res)
fviz_pca_biplot(res, invisible = "ind")

fviz_pca_var(
  res,
  repel = TRUE,
  col.var = "contrib"
)

fviz_pca_var(
  res,
  col.var = "contrib",
  select.var = list(contrib = 5),
  repel = TRUE
)

fviz_pca_var(
  res,
  col.var = "cos2",
  gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
  repel = TRUE
)

fviz_pca_var(
  res,
  col.var = "cos2",
  select.var = list(cos2 = 0.90),
  repel = TRUE,
  gradient.cols = c("#E7B800", "#FC4E07"),

  )

ACP sur l’ensemble des données en format long

df_num <- df[, .SD, .SDcols = is.numeric]
  
res <- PCA(df_num,
           scale.unit = TRUE,
           graph = FALSE)
## Warning in PCA(df_num, scale.unit = TRUE, graph = FALSE): Missing values are
## imputed by the mean of the variable: you should use the imputePCA function of
## the missMDA package
summary(res)
## 
## Call:
## PCA(X = df_num, scale.unit = TRUE, graph = FALSE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance              13.660   3.637   2.273   1.397   1.042   0.993   0.817
## % of var.             50.592  13.471   8.418   5.172   3.858   3.679   3.027
## Cumulative % of var.  50.592  64.062  72.480  77.653  81.511  85.190  88.217
##                        Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13  Dim.14
## Variance               0.725   0.475   0.396   0.369   0.286   0.244   0.201
## % of var.              2.685   1.759   1.468   1.365   1.059   0.905   0.746
## Cumulative % of var.  90.903  92.661  94.129  95.494  96.553  97.458  98.203
##                       Dim.15  Dim.16  Dim.17  Dim.18  Dim.19  Dim.20  Dim.21
## Variance               0.181   0.121   0.098   0.064   0.015   0.005   0.000
## % of var.              0.671   0.448   0.362   0.236   0.057   0.020   0.001
## Cumulative % of var.  98.875  99.322  99.684  99.920  99.977  99.996  99.998
##                       Dim.22  Dim.23  Dim.24  Dim.25  Dim.26  Dim.27
## Variance               0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.001   0.001   0.000   0.000   0.000   0.000
## Cumulative % of var.  99.999 100.000 100.000 100.000 100.000 100.000
## 
## Individuals (the 10 first)
##                                 Dist    Dim.1    ctr   cos2    Dim.2    ctr
## 1                           |  3.296 | -2.668  0.000  0.655 |  0.423  0.000
## 2                           |  6.202 |  4.306  0.001  0.482 | -0.001  0.000
## 3                           |  5.736 |  4.447  0.001  0.601 |  0.228  0.000
## 4                           |  9.264 |  6.581  0.002  0.505 |  5.652  0.006
## 5                           |  4.108 | -3.633  0.001  0.782 |  0.381  0.000
## 6                           |  4.532 |  3.037  0.000  0.449 | -0.834  0.000
## 7                           |  2.870 |  0.453  0.000  0.025 |  1.400  0.000
## 8                           |  3.268 |  0.228  0.000  0.005 | -0.258  0.000
## 9                           |  3.568 | -2.653  0.000  0.553 |  0.627  0.000
## 10                          |  4.011 | -0.025  0.000  0.000 | -2.277  0.001
##                               cos2    Dim.3    ctr   cos2  
## 1                            0.016 | -0.657  0.000  0.040 |
## 2                            0.000 | -3.789  0.004  0.373 |
## 3                            0.002 | -2.597  0.002  0.205 |
## 4                            0.372 |  0.157  0.000  0.000 |
## 5                            0.009 | -0.377  0.000  0.008 |
## 6                            0.034 | -1.593  0.001  0.123 |
## 7                            0.238 | -1.293  0.000  0.203 |
## 8                            0.006 | -2.453  0.002  0.563 |
## 9                            0.031 | -1.288  0.000  0.130 |
## 10                           0.322 |  0.623  0.000  0.024 |
## 
## Variables (the 10 first)
##                                Dim.1    ctr   cos2    Dim.2    ctr   cos2  
## nb_menages                  |  0.974  6.952  0.950 | -0.100  0.274  0.010 |
## nb_personnes_menage         |  0.965  6.820  0.932 |  0.131  0.475  0.017 |
## nb_logements                |  0.841  5.179  0.707 | -0.123  0.414  0.015 |
## nb_RP_1_piece               |  0.459  1.546  0.211 | -0.613 10.326  0.376 |
## nb_RP_2_pieces              |  0.684  3.424  0.468 | -0.550  8.322  0.303 |
## nb_RP_3_pieces              |  0.792  4.591  0.627 | -0.285  2.229  0.081 |
## nb_RP_4_pieces              |  0.707  3.663  0.500 |  0.374  3.844  0.140 |
## nb_RP_5_piece_et_plus       |  0.434  1.379  0.188 |  0.733 14.760  0.537 |
## nb_RP_en_loc                |  0.742  4.032  0.551 | -0.616 10.430  0.379 |
## nb_RP_proprio               |  0.670  3.281  0.448 |  0.625 10.732  0.390 |
##                              Dim.3    ctr   cos2  
## nb_menages                   0.079  0.272  0.006 |
## nb_personnes_menage         -0.132  0.771  0.018 |
## nb_logements                 0.205  1.851  0.042 |
## nb_RP_1_piece                0.403  7.163  0.163 |
## nb_RP_2_pieces               0.265  3.096  0.070 |
## nb_RP_3_pieces              -0.168  1.248  0.028 |
## nb_RP_4_pieces              -0.358  5.625  0.128 |
## nb_RP_5_piece_et_plus        0.064  0.179  0.004 |
## nb_RP_en_loc                -0.147  0.955  0.022 |
## nb_RP_proprio                0.300  3.963  0.090 |
# explor(res)
fviz_pca_biplot(res, invisible = "ind")

fviz_pca_var(
  res,
  repel = TRUE,
  col.var = "contrib"
)

fviz_pca_var(
  res,
  col.var = "contrib",
  select.var = list(contrib = 5),
  repel = TRUE
)

fviz_pca_var(
  res,
  col.var = "cos2",
  gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
  repel = TRUE
)

fviz_pca_var(
  res,
  col.var = "cos2",
  select.var = list(cos2 = 0.90),
  repel = TRUE,
  gradient.cols = c("#E7B800", "#FC4E07"),

  )

# ACP sur l’ensemble des données au format large

col_num = c("nb_menages", "nb_personnes_menage",
            "nb_logements", "nb_RP_1_piece",
            "nb_RP_2_pieces", "nb_RP_3_pieces", 
            "nb_RP_4_pieces", "nb_RP_5_piece_et_plus",
            "nb_RP_en_loc", "nb_RP_proprio",
            "nb_personnes_en_RP", "nb_personnes_en_RP_location",
            "nb_personnes_en_RP_proprio", "nb_residences_second_ou_occ",
            "nb_logements_vacants", "nb_RP",
            "nb_actifs", 
            "nb_actifs_occ", "nb_chomeurs" ,
            "nb_agriculteurs", "nb_commercants",
            "nb_cadres", "nb_professions_inter")

df_large = dcast(df, formula = IRIS + COM ~ annee,
                 value.var = col_num)
df_num <- df_large[, .SD, .SDcols = is.numeric]
  
res <- PCA(df_num,
           scale.unit = TRUE,
           graph = FALSE)
## Warning in PCA(df_num, scale.unit = TRUE, graph = FALSE): Missing values are
## imputed by the mean of the variable: you should use the imputePCA function of
## the missMDA package
summary(res)
## 
## Call:
## PCA(X = df_num, scale.unit = TRUE, graph = FALSE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance             131.107  37.336  18.712  14.663   9.069   7.143   5.431
## % of var.             51.821  14.757   7.396   5.796   3.585   2.823   2.147
## Cumulative % of var.  51.821  66.578  73.974  79.770  83.355  86.178  88.325
##                        Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13  Dim.14
## Variance               3.737   3.272   2.889   2.665   2.163   1.774   1.428
## % of var.              1.477   1.293   1.142   1.053   0.855   0.701   0.564
## Cumulative % of var.  89.802  91.095  92.237  93.290  94.145  94.846  95.411
##                       Dim.15  Dim.16  Dim.17  Dim.18  Dim.19  Dim.20  Dim.21
## Variance               1.088   0.855   0.757   0.701   0.615   0.477   0.468
## % of var.              0.430   0.338   0.299   0.277   0.243   0.188   0.185
## Cumulative % of var.  95.841  96.179  96.478  96.755  96.998  97.187  97.372
##                       Dim.22  Dim.23  Dim.24  Dim.25  Dim.26  Dim.27  Dim.28
## Variance               0.434   0.394   0.373   0.310   0.292   0.288   0.228
## % of var.              0.172   0.156   0.148   0.123   0.116   0.114   0.090
## Cumulative % of var.  97.543  97.699  97.846  97.969  98.085  98.199  98.289
##                       Dim.29  Dim.30  Dim.31  Dim.32  Dim.33  Dim.34  Dim.35
## Variance               0.209   0.204   0.190   0.170   0.165   0.160   0.148
## % of var.              0.083   0.081   0.075   0.067   0.065   0.063   0.059
## Cumulative % of var.  98.371  98.452  98.527  98.595  98.660  98.723  98.782
##                       Dim.36  Dim.37  Dim.38  Dim.39  Dim.40  Dim.41  Dim.42
## Variance               0.146   0.142   0.131   0.124   0.115   0.110   0.105
## % of var.              0.058   0.056   0.052   0.049   0.045   0.043   0.042
## Cumulative % of var.  98.839  98.896  98.948  98.996  99.042  99.085  99.127
##                       Dim.43  Dim.44  Dim.45  Dim.46  Dim.47  Dim.48  Dim.49
## Variance               0.090   0.086   0.081   0.077   0.073   0.070   0.066
## % of var.              0.036   0.034   0.032   0.031   0.029   0.028   0.026
## Cumulative % of var.  99.162  99.196  99.228  99.259  99.288  99.315  99.341
##                       Dim.50  Dim.51  Dim.52  Dim.53  Dim.54  Dim.55  Dim.56
## Variance               0.063   0.060   0.059   0.057   0.053   0.049   0.048
## % of var.              0.025   0.024   0.023   0.023   0.021   0.020   0.019
## Cumulative % of var.  99.366  99.390  99.413  99.435  99.457  99.476  99.495
##                       Dim.57  Dim.58  Dim.59  Dim.60  Dim.61  Dim.62  Dim.63
## Variance               0.046   0.044   0.042   0.041   0.038   0.037   0.036
## % of var.              0.018   0.017   0.017   0.016   0.015   0.014   0.014
## Cumulative % of var.  99.513  99.531  99.547  99.564  99.578  99.593  99.607
##                       Dim.64  Dim.65  Dim.66  Dim.67  Dim.68  Dim.69  Dim.70
## Variance               0.033   0.032   0.031   0.029   0.029   0.028   0.026
## % of var.              0.013   0.013   0.012   0.012   0.011   0.011   0.010
## Cumulative % of var.  99.620  99.633  99.645  99.657  99.668  99.679  99.689
##                       Dim.71  Dim.72  Dim.73  Dim.74  Dim.75  Dim.76  Dim.77
## Variance               0.025   0.024   0.024   0.023   0.023   0.021   0.021
## % of var.              0.010   0.010   0.009   0.009   0.009   0.008   0.008
## Cumulative % of var.  99.699  99.709  99.718  99.727  99.736  99.745  99.753
##                       Dim.78  Dim.79  Dim.80  Dim.81  Dim.82  Dim.83  Dim.84
## Variance               0.020   0.019   0.019   0.018   0.017   0.016   0.016
## % of var.              0.008   0.008   0.007   0.007   0.007   0.006   0.006
## Cumulative % of var.  99.761  99.769  99.776  99.783  99.790  99.796  99.802
##                       Dim.85  Dim.86  Dim.87  Dim.88  Dim.89  Dim.90  Dim.91
## Variance               0.015   0.015   0.015   0.014   0.014   0.013   0.013
## % of var.              0.006   0.006   0.006   0.006   0.005   0.005   0.005
## Cumulative % of var.  99.809  99.814  99.820  99.826  99.831  99.837  99.842
##                       Dim.92  Dim.93  Dim.94  Dim.95  Dim.96  Dim.97  Dim.98
## Variance               0.013   0.012   0.012   0.012   0.011   0.011   0.011
## % of var.              0.005   0.005   0.005   0.005   0.005   0.004   0.004
## Cumulative % of var.  99.847  99.852  99.857  99.861  99.866  99.870  99.875
##                       Dim.99 Dim.100 Dim.101 Dim.102 Dim.103 Dim.104 Dim.105
## Variance               0.011   0.011   0.011   0.010   0.010   0.010   0.010
## % of var.              0.004   0.004   0.004   0.004   0.004   0.004   0.004
## Cumulative % of var.  99.879  99.883  99.887  99.891  99.895  99.899  99.903
##                      Dim.106 Dim.107 Dim.108 Dim.109 Dim.110 Dim.111 Dim.112
## Variance               0.009   0.009   0.009   0.008   0.008   0.008   0.008
## % of var.              0.004   0.004   0.003   0.003   0.003   0.003   0.003
## Cumulative % of var.  99.907  99.910  99.913  99.917  99.920  99.923  99.926
##                      Dim.113 Dim.114 Dim.115 Dim.116 Dim.117 Dim.118 Dim.119
## Variance               0.008   0.007   0.007   0.007   0.007   0.006   0.006
## % of var.              0.003   0.003   0.003   0.003   0.003   0.003   0.003
## Cumulative % of var.  99.929  99.932  99.935  99.938  99.940  99.943  99.945
##                      Dim.120 Dim.121 Dim.122 Dim.123 Dim.124 Dim.125 Dim.126
## Variance               0.006   0.006   0.006   0.006   0.005   0.005   0.005
## % of var.              0.002   0.002   0.002   0.002   0.002   0.002   0.002
## Cumulative % of var.  99.948  99.950  99.952  99.955  99.957  99.959  99.961
##                      Dim.127 Dim.128 Dim.129 Dim.130 Dim.131 Dim.132 Dim.133
## Variance               0.005   0.005   0.005   0.004   0.004   0.004   0.004
## % of var.              0.002   0.002   0.002   0.002   0.002   0.002   0.002
## Cumulative % of var.  99.963  99.965  99.967  99.968  99.970  99.972  99.973
##                      Dim.134 Dim.135 Dim.136 Dim.137 Dim.138 Dim.139 Dim.140
## Variance               0.004   0.003   0.003   0.003   0.003   0.003   0.003
## % of var.              0.001   0.001   0.001   0.001   0.001   0.001   0.001
## Cumulative % of var.  99.975  99.976  99.977  99.978  99.979  99.980  99.981
##                      Dim.141 Dim.142 Dim.143 Dim.144 Dim.145 Dim.146 Dim.147
## Variance               0.003   0.002   0.002   0.002   0.002   0.002   0.002
## % of var.              0.001   0.001   0.001   0.001   0.001   0.001   0.001
## Cumulative % of var.  99.982  99.983  99.984  99.985  99.986  99.987  99.988
##                      Dim.148 Dim.149 Dim.150 Dim.151 Dim.152 Dim.153 Dim.154
## Variance               0.002   0.002   0.002   0.002   0.002   0.001   0.001
## % of var.              0.001   0.001   0.001   0.001   0.001   0.001   0.001
## Cumulative % of var.  99.989  99.989  99.990  99.991  99.991  99.992  99.992
##                      Dim.155 Dim.156 Dim.157 Dim.158 Dim.159 Dim.160 Dim.161
## Variance               0.001   0.001   0.001   0.001   0.001   0.001   0.001
## % of var.              0.001   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var.  99.993  99.993  99.994  99.994  99.994  99.995  99.995
##                      Dim.162 Dim.163 Dim.164 Dim.165 Dim.166 Dim.167 Dim.168
## Variance               0.001   0.001   0.001   0.001   0.001   0.001   0.001
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var.  99.996  99.996  99.996  99.996  99.997  99.997  99.997
##                      Dim.169 Dim.170 Dim.171 Dim.172 Dim.173 Dim.174 Dim.175
## Variance               0.001   0.001   0.001   0.001   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var.  99.998  99.998  99.998  99.998  99.998  99.999  99.999
##                      Dim.176 Dim.177 Dim.178 Dim.179 Dim.180 Dim.181 Dim.182
## Variance               0.000   0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var.  99.999  99.999  99.999  99.999  99.999  99.999  99.999
##                      Dim.183 Dim.184 Dim.185 Dim.186 Dim.187 Dim.188 Dim.189
## Variance               0.000   0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var.  99.999 100.000 100.000 100.000 100.000 100.000 100.000
##                      Dim.190 Dim.191 Dim.192 Dim.193 Dim.194 Dim.195 Dim.196
## Variance               0.000   0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
##                      Dim.197 Dim.198 Dim.199 Dim.200 Dim.201 Dim.202 Dim.203
## Variance               0.000   0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
##                      Dim.204 Dim.205 Dim.206 Dim.207 Dim.208 Dim.209 Dim.210
## Variance               0.000   0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
##                      Dim.211 Dim.212 Dim.213 Dim.214 Dim.215 Dim.216 Dim.217
## Variance               0.000   0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
##                      Dim.218 Dim.219 Dim.220 Dim.221 Dim.222 Dim.223 Dim.224
## Variance               0.000   0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
##                      Dim.225 Dim.226 Dim.227 Dim.228 Dim.229 Dim.230 Dim.231
## Variance               0.000   0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
##                      Dim.232 Dim.233 Dim.234 Dim.235 Dim.236 Dim.237 Dim.238
## Variance               0.000   0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
##                      Dim.239 Dim.240 Dim.241 Dim.242 Dim.243 Dim.244 Dim.245
## Variance               0.000   0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
##                      Dim.246 Dim.247 Dim.248 Dim.249 Dim.250 Dim.251 Dim.252
## Variance               0.000   0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
##                      Dim.253
## Variance               0.000
## % of var.              0.000
## Cumulative % of var. 100.000
## 
## Individuals (the 10 first)
##                                      Dist    Dim.1    ctr   cos2    Dim.2
## 1                                |  9.288 | -8.439  0.004  0.825 | -0.608
## 2                                | 16.685 | 14.077  0.011  0.712 | -3.154
## 3                                | 19.644 | 17.556  0.017  0.799 |  0.139
## 4                                | 27.976 | 22.158  0.027  0.627 | 15.423
## 5                                |  7.977 | -5.048  0.001  0.400 |  0.401
## 6                                | 14.372 |  9.219  0.005  0.411 | -6.236
## 7                                |  4.736 | -0.030  0.000  0.000 |  1.429
## 8                                |  5.307 |  0.666  0.000  0.016 |  3.657
## 9                                |  6.757 |  0.528  0.000  0.006 | -2.172
## 10                               |  9.742 | -9.051  0.005  0.863 |  1.474
##                                     ctr   cos2    Dim.3    ctr   cos2  
## 1                                 0.000  0.004 | -0.016  0.000  0.000 |
## 2                                 0.002  0.036 | -6.644  0.017  0.159 |
## 3                                 0.000  0.000 | -6.632  0.017  0.114 |
## 4                                 0.046  0.304 | -1.480  0.001  0.003 |
## 5                                 0.000  0.003 | -0.104  0.000  0.000 |
## 6                                 0.008  0.188 | -2.880  0.003  0.040 |
## 7                                 0.000  0.091 | -0.608  0.000  0.017 |
## 8                                 0.003  0.475 | -1.579  0.001  0.089 |
## 9                                 0.001  0.103 | -5.391  0.011  0.637 |
## 10                                0.000  0.023 | -1.389  0.001  0.020 |
## 
## Variables (the 10 first)
##                                     Dim.1    ctr   cos2    Dim.2    ctr   cos2
## nb_menages_2012                  |  0.916  0.640  0.839 | -0.129  0.045  0.017
## nb_menages_2013                  |  0.951  0.690  0.905 | -0.111  0.033  0.012
## nb_menages_2014                  |  0.938  0.671  0.880 | -0.091  0.022  0.008
## nb_menages_2015                  |  0.968  0.715  0.937 | -0.098  0.026  0.010
## nb_menages_2016                  |  0.973  0.722  0.947 | -0.093  0.023  0.009
## nb_menages_2017                  |  0.976  0.726  0.952 | -0.087  0.020  0.008
## nb_menages_2018                  |  0.977  0.727  0.954 | -0.082  0.018  0.007
## nb_menages_2019                  |  0.976  0.727  0.953 | -0.077  0.016  0.006
## nb_menages_2020                  |  0.974  0.724  0.949 | -0.068  0.012  0.005
## nb_menages_2021                  |  0.969  0.717  0.940 | -0.064  0.011  0.004
##                                     Dim.3    ctr   cos2  
## nb_menages_2012                  |  0.068  0.025  0.005 |
## nb_menages_2013                  |  0.046  0.011  0.002 |
## nb_menages_2014                  |  0.034  0.006  0.001 |
## nb_menages_2015                  |  0.030  0.005  0.001 |
## nb_menages_2016                  |  0.027  0.004  0.001 |
## nb_menages_2017                  |  0.024  0.003  0.001 |
## nb_menages_2018                  |  0.021  0.002  0.000 |
## nb_menages_2019                  |  0.019  0.002  0.000 |
## nb_menages_2020                  |  0.016  0.001  0.000 |
## nb_menages_2021                  |  0.015  0.001  0.000 |
# explor(res)
fviz_pca_biplot(res, invisible = "ind")

fviz_pca_var(
  res,
  repel = TRUE,
  col.var = "contrib"
)

fviz_pca_var(
  res,
  col.var = "contrib",
  select.var = list(contrib = 5),
  repel = TRUE
)

fviz_pca_var(
  res,
  col.var = "cos2",
  gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
  repel = TRUE
)

fviz_pca_var(
  res,
  col.var = "cos2",
  select.var = list(cos2 = 0.95),
  repel = TRUE,
  gradient.cols = c("#E7B800", "#FC4E07"),

  )

ACP en format large uniquement sur 2015, 2016 et 2017

df_large = dcast(df[annee %in% c(2015, 2016, 2017)], 
                 formula = IRIS + COM ~ annee,
                 value.var = col_num)
df_num <- df_large[, .SD, .SDcols = is.numeric]
  
res <- PCA(df_num,
           scale.unit = TRUE,
           graph = FALSE)
## Warning in PCA(df_num, scale.unit = TRUE, graph = FALSE): Missing values are
## imputed by the mean of the variable: you should use the imputePCA function of
## the missMDA package
summary(res)
## 
## Call:
## PCA(X = df_num, scale.unit = TRUE, graph = FALSE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance              36.762  10.563   5.311   4.098   2.755   2.168   1.591
## % of var.             53.279  15.308   7.696   5.940   3.992   3.141   2.305
## Cumulative % of var.  53.279  68.587  76.283  82.223  86.215  89.356  91.662
##                        Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13  Dim.14
## Variance               1.198   0.903   0.832   0.686   0.557   0.358   0.232
## % of var.              1.737   1.308   1.206   0.995   0.807   0.518   0.337
## Cumulative % of var.  93.399  94.707  95.913  96.907  97.715  98.233  98.570
##                       Dim.15  Dim.16  Dim.17  Dim.18  Dim.19  Dim.20  Dim.21
## Variance               0.221   0.098   0.085   0.059   0.052   0.049   0.046
## % of var.              0.321   0.142   0.124   0.086   0.075   0.070   0.067
## Cumulative % of var.  98.891  99.033  99.156  99.242  99.317  99.387  99.454
##                       Dim.22  Dim.23  Dim.24  Dim.25  Dim.26  Dim.27  Dim.28
## Variance               0.040   0.037   0.034   0.026   0.025   0.022   0.021
## % of var.              0.058   0.053   0.049   0.038   0.036   0.032   0.030
## Cumulative % of var.  99.512  99.565  99.614  99.652  99.688  99.720  99.750
##                       Dim.29  Dim.30  Dim.31  Dim.32  Dim.33  Dim.34  Dim.35
## Variance               0.018   0.018   0.017   0.015   0.015   0.013   0.012
## % of var.              0.026   0.025   0.024   0.022   0.021   0.018   0.018
## Cumulative % of var.  99.776  99.801  99.826  99.848  99.870  99.888  99.906
##                       Dim.36  Dim.37  Dim.38  Dim.39  Dim.40  Dim.41  Dim.42
## Variance               0.011   0.010   0.008   0.007   0.006   0.005   0.004
## % of var.              0.016   0.015   0.011   0.010   0.008   0.007   0.007
## Cumulative % of var.  99.921  99.936  99.947  99.957  99.966  99.973  99.979
##                       Dim.43  Dim.44  Dim.45  Dim.46  Dim.47  Dim.48  Dim.49
## Variance               0.004   0.003   0.002   0.001   0.001   0.001   0.001
## % of var.              0.005   0.005   0.002   0.002   0.002   0.001   0.001
## Cumulative % of var.  99.985  99.990  99.992  99.994  99.996  99.997  99.998
##                       Dim.50  Dim.51  Dim.52  Dim.53  Dim.54  Dim.55  Dim.56
## Variance               0.000   0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.001   0.001   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var.  99.999 100.000 100.000 100.000 100.000 100.000 100.000
##                       Dim.57  Dim.58  Dim.59  Dim.60  Dim.61  Dim.62  Dim.63
## Variance               0.000   0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
##                       Dim.64  Dim.65  Dim.66  Dim.67  Dim.68  Dim.69
## Variance               0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000
## 
## Individuals (the 10 first)
##                                      Dist    Dim.1    ctr   cos2    Dim.2
## 1                                |  4.824 | -4.410  0.004  0.836 | -0.834
## 2                                |  8.224 |  6.920  0.010  0.708 | -2.192
## 3                                | 10.343 |  9.321  0.017  0.812 |  0.346
## 4                                | 14.271 | 11.117  0.025  0.607 |  8.272
## 5                                |  4.773 | -3.707  0.003  0.603 |  0.054
## 6                                |  7.284 |  4.815  0.005  0.437 | -3.433
## 7                                |  1.710 |  0.110  0.000  0.004 |  0.540
## 8                                |  3.058 |  0.651  0.000  0.045 |  2.192
## 9                                |  3.677 |  0.240  0.000  0.004 | -1.335
## 10                               |  4.910 | -4.403  0.004  0.804 |  1.319
##                                     ctr   cos2    Dim.3    ctr   cos2  
## 1                                 0.000  0.030 | -0.121  0.000  0.001 |
## 2                                 0.003  0.071 | -3.026  0.013  0.135 |
## 3                                 0.000  0.001 | -3.549  0.017  0.118 |
## 4                                 0.047  0.336 | -0.312  0.000  0.000 |
## 5                                 0.000  0.000 | -0.246  0.000  0.003 |
## 6                                 0.008  0.222 | -1.627  0.004  0.050 |
## 7                                 0.000  0.100 | -0.291  0.000  0.029 |
## 8                                 0.003  0.514 | -0.667  0.001  0.048 |
## 9                                 0.001  0.132 | -2.913  0.012  0.628 |
## 10                                0.001  0.072 | -0.777  0.001  0.025 |
## 
## Variables (the 10 first)
##                                     Dim.1    ctr   cos2    Dim.2    ctr   cos2
## nb_menages_2015                  |  0.974  2.580  0.948 | -0.098  0.092  0.010
## nb_menages_2016                  |  0.978  2.603  0.957 | -0.093  0.081  0.009
## nb_menages_2017                  |  0.976  2.589  0.952 | -0.088  0.074  0.008
## nb_personnes_menage_2015         |  0.952  2.468  0.907 |  0.119  0.134  0.014
## nb_personnes_menage_2016         |  0.954  2.474  0.910 |  0.121  0.139  0.015
## nb_personnes_menage_2017         |  0.949  2.451  0.901 |  0.119  0.135  0.014
## nb_logements_2015                |  0.843  1.934  0.711 | -0.111  0.117  0.012
## nb_logements_2016                |  0.849  1.963  0.722 | -0.109  0.113  0.012
## nb_logements_2017                |  0.850  1.964  0.722 | -0.109  0.112  0.012
## nb_RP_1_piece_2015               |  0.470  0.601  0.221 | -0.542  2.778  0.293
##                                     Dim.3    ctr   cos2  
## nb_menages_2015                  |  0.032  0.019  0.001 |
## nb_menages_2016                  |  0.029  0.016  0.001 |
## nb_menages_2017                  |  0.027  0.014  0.001 |
## nb_personnes_menage_2015         | -0.198  0.738  0.039 |
## nb_personnes_menage_2016         | -0.199  0.747  0.040 |
## nb_personnes_menage_2017         | -0.197  0.733  0.039 |
## nb_logements_2015                |  0.181  0.614  0.033 |
## nb_logements_2016                |  0.179  0.605  0.032 |
## nb_logements_2017                |  0.177  0.592  0.031 |
## nb_RP_1_piece_2015               |  0.443  3.704  0.197 |
fviz_pca_biplot(res, invisible = "ind")

fviz_pca_var(
  res,
  repel = TRUE,
  col.var = "contrib"
)

fviz_pca_var(
  res,
  col.var = "contrib",
  select.var = list(contrib = 5),
  repel = TRUE
)

fviz_pca_var(
  res,
  col.var = "cos2",
  gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
  repel = TRUE
)

fviz_pca_var(
  res,
  col.var = "cos2",
  select.var = list(cos2 = 0.95),
  repel = TRUE,
  gradient.cols = c("#E7B800", "#FC4E07"),

  )

ACP avec diff entre 2015 et 2017

df_large[, diff_pop_2015_2017 := nb_personnes_menage_2015 - nb_personnes_menage_2017]

df_large[, diff_log_2015_2017 := nb_logements_2015 - nb_logements_2017]

df_large[, diff_1_piece_2015_2017 := nb_RP_1_piece_2015 - nb_RP_1_piece_2017]

df_large[, diff_loc_2015_2017 := nb_RP_en_loc_2015 - nb_RP_en_loc_2017]

df_large[, diff_vacants_2015_2017 := nb_logements_vacants_2015 - nb_logements_vacants_2017]

df_large[, diff_actifs_occ_2015_2017 := nb_actifs_occ_2015 - nb_actifs_occ_2017]
df_num <- df_large[, .SD, .SDcols = is.numeric]
  
res <- PCA(df_num,
           scale.unit = TRUE,
           graph = FALSE)
## Warning in PCA(df_num, scale.unit = TRUE, graph = FALSE): Missing values are
## imputed by the mean of the variable: you should use the imputePCA function of
## the missMDA package
summary(res)
## 
## Call:
## PCA(X = df_num, scale.unit = TRUE, graph = FALSE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance              36.894  10.565   5.312   4.140   3.166   2.712   2.065
## % of var.             49.192  14.087   7.083   5.521   4.221   3.616   2.753
## Cumulative % of var.  49.192  63.279  70.362  75.883  80.104  83.720  86.473
##                        Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13  Dim.14
## Variance               1.597   1.204   1.190   1.062   0.900   0.829   0.682
## % of var.              2.129   1.605   1.586   1.416   1.200   1.105   0.909
## Cumulative % of var.  88.603  90.208  91.794  93.211  94.411  95.516  96.425
##                       Dim.15  Dim.16  Dim.17  Dim.18  Dim.19  Dim.20  Dim.21
## Variance               0.550   0.426   0.356   0.247   0.228   0.166   0.129
## % of var.              0.733   0.568   0.475   0.330   0.304   0.222   0.172
## Cumulative % of var.  97.158  97.726  98.201  98.530  98.834  99.056  99.228
##                       Dim.22  Dim.23  Dim.24  Dim.25  Dim.26  Dim.27  Dim.28
## Variance               0.097   0.065   0.054   0.047   0.031   0.028   0.025
## % of var.              0.129   0.086   0.072   0.063   0.041   0.037   0.034
## Cumulative % of var.  99.357  99.444  99.516  99.579  99.620  99.657  99.691
##                       Dim.29  Dim.30  Dim.31  Dim.32  Dim.33  Dim.34  Dim.35
## Variance               0.024   0.022   0.019   0.018   0.017   0.016   0.015
## % of var.              0.032   0.030   0.025   0.024   0.023   0.021   0.020
## Cumulative % of var.  99.723  99.753  99.778  99.802  99.825  99.846  99.866
##                       Dim.36  Dim.37  Dim.38  Dim.39  Dim.40  Dim.41  Dim.42
## Variance               0.015   0.012   0.011   0.010   0.008   0.008   0.007
## % of var.              0.020   0.015   0.014   0.014   0.011   0.010   0.009
## Cumulative % of var.  99.886  99.901  99.916  99.929  99.940  99.950  99.960
##                       Dim.43  Dim.44  Dim.45  Dim.46  Dim.47  Dim.48  Dim.49
## Variance               0.006   0.005   0.005   0.003   0.003   0.001   0.001
## % of var.              0.008   0.007   0.006   0.005   0.004   0.002   0.002
## Cumulative % of var.  99.968  99.975  99.981  99.986  99.990  99.992  99.993
##                       Dim.50  Dim.51  Dim.52  Dim.53  Dim.54  Dim.55  Dim.56
## Variance               0.001   0.001   0.001   0.001   0.001   0.000   0.000
## % of var.              0.001   0.001   0.001   0.001   0.001   0.001   0.000
## Cumulative % of var.  99.995  99.996  99.997  99.998  99.999  99.999 100.000
##                       Dim.57  Dim.58  Dim.59  Dim.60  Dim.61  Dim.62  Dim.63
## Variance               0.000   0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
##                       Dim.64  Dim.65  Dim.66  Dim.67  Dim.68  Dim.69  Dim.70
## Variance               0.000   0.000   0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000 100.000 100.000
##                       Dim.71  Dim.72  Dim.73  Dim.74  Dim.75
## Variance               0.000   0.000   0.000   0.000   0.000
## % of var.              0.000   0.000   0.000   0.000   0.000
## Cumulative % of var. 100.000 100.000 100.000 100.000 100.000
## 
## Individuals (the 10 first)
##                                      Dist    Dim.1    ctr   cos2    Dim.2
## 1                                |  5.572 | -4.312  0.004  0.599 | -0.801
## 2                                |  8.662 |  6.931  0.010  0.640 | -2.227
## 3                                | 10.482 |  9.385  0.018  0.802 |  0.345
## 4                                | 14.461 | 11.163  0.025  0.596 |  8.268
## 5                                |  4.773 | -3.698  0.003  0.600 |  0.060
## 6                                |  7.339 |  4.839  0.005  0.435 | -3.428
## 7                                |  1.710 |  0.109  0.000  0.004 |  0.539
## 8                                |  3.878 |  0.601  0.000  0.024 |  2.160
## 9                                |  3.972 |  0.288  0.000  0.005 | -1.327
## 10                               |  5.082 | -4.429  0.004  0.760 |  1.306
##                                     ctr   cos2    Dim.3    ctr   cos2  
## 1                                 0.000  0.021 | -0.100  0.000  0.000 |
## 2                                 0.003  0.066 | -3.044  0.013  0.123 |
## 3                                 0.000  0.001 | -3.558  0.017  0.115 |
## 4                                 0.047  0.327 | -0.329  0.000  0.001 |
## 5                                 0.000  0.000 | -0.244  0.000  0.003 |
## 6                                 0.008  0.218 | -1.633  0.004  0.050 |
## 7                                 0.000  0.099 | -0.292  0.000  0.029 |
## 8                                 0.003  0.310 | -0.693  0.001  0.032 |
## 9                                 0.001  0.112 | -2.906  0.012  0.536 |
## 10                                0.001  0.066 | -0.780  0.001  0.024 |
## 
## Variables (the 10 first)
##                                     Dim.1    ctr   cos2    Dim.2    ctr   cos2
## nb_menages_2015                  |  0.972  2.563  0.946 | -0.100  0.094  0.010
## nb_menages_2016                  |  0.978  2.591  0.956 | -0.093  0.083  0.009
## nb_menages_2017                  |  0.976  2.583  0.953 | -0.089  0.074  0.008
## nb_personnes_menage_2015         |  0.951  2.453  0.905 |  0.118  0.131  0.014
## nb_personnes_menage_2016         |  0.954  2.466  0.910 |  0.121  0.137  0.015
## nb_personnes_menage_2017         |  0.951  2.449  0.904 |  0.119  0.135  0.014
## nb_logements_2015                |  0.842  1.921  0.709 | -0.112  0.119  0.013
## nb_logements_2016                |  0.849  1.953  0.721 | -0.110  0.115  0.012
## nb_logements_2017                |  0.850  1.958  0.722 | -0.109  0.112  0.012
## nb_RP_1_piece_2015               |  0.469  0.596  0.220 | -0.542  2.780  0.294
##                                     Dim.3    ctr   cos2  
## nb_menages_2015                  |  0.031  0.018  0.001 |
## nb_menages_2016                  |  0.029  0.016  0.001 |
## nb_menages_2017                  |  0.027  0.014  0.001 |
## nb_personnes_menage_2015         | -0.198  0.741  0.039 |
## nb_personnes_menage_2016         | -0.199  0.747  0.040 |
## nb_personnes_menage_2017         | -0.197  0.730  0.039 |
## nb_logements_2015                |  0.180  0.611  0.032 |
## nb_logements_2016                |  0.179  0.603  0.032 |
## nb_logements_2017                |  0.177  0.592  0.031 |
## nb_RP_1_piece_2015               |  0.443  3.699  0.196 |
fviz_pca_biplot(res, invisible = "ind")

fviz_pca_var(
  res,
  repel = TRUE,
  col.var = "contrib"
)

fviz_pca_var(
  res,
  col.var = "contrib",
  select.var = list(contrib = 5),
  repel = TRUE
)

fviz_pca_var(
  res,
  col.var = "cos2",
  gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
  repel = TRUE
)

fviz_pca_var(
  res,
  col.var = "cos2",
  select.var = list(cos2 = 0.95),
  repel = TRUE,
  gradient.cols = c("#E7B800", "#FC4E07"),

  )

Clustering sur les nouvelles coordonnées ACP à l’IRIS

df_num_com <- df[annee==2017]



df_num <- df_num_com[, .SD, .SDcols = is.numeric]

  
df_num = df_num[,-"annee"]


res <- PCA(df_num,
           scale.unit = TRUE,
           graph = FALSE)

K means avec 20 clusters

set.seed(123)


# on récupère les coordonnées
ind_coords <- res$ind$coord

ind_coords_sub <- ind_coords[, 1:2]  # sélection des axes 1 et 2


km <- kmeans(ind_coords_sub, centers = 20, nstart = 25)
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
df_cluster <- df_num_com
df_cluster$cluster <- km$cluster
fviz_cluster(km, data = ind_coords_sub,
             ellipse.type = "convex",
             geom = "point",
             palette = "jco",
             repel = TRUE)
## Warning: This manual palette can handle a maximum of 10 values. You have supplied 20
## This manual palette can handle a maximum of 10 values. You have supplied 20
## Warning: Removed 7806 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_point()`).

set.seed(123)


# on récupère les coordonnées
ind_coords <- res$ind$coord

ind_coords_sub <- ind_coords[, 1:2]  # sélection des axes 1 et 2


km <- kmeans(ind_coords_sub, centers = 100, nstart = 25)
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
df_cluster <- df_num_com
df_cluster$cluster <- km$cluster
fviz_cluster(km, data = ind_coords_sub,
             ellipse.type = "convex",
             geom = "point",
             palette = "jco",
             repel = TRUE)
## Warning: This manual palette can handle a maximum of 10 values. You have supplied 100
## This manual palette can handle a maximum of 10 values. You have supplied 100
## Warning: Removed 12164 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 90 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '26'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '26'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '27'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '27'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '28'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '28'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '29'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '29'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '30'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '30'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '31'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '31'

Même avec 100 clusters on a les 100 clusters à Lille ou dans le 19e arrondissement de Paris.

ACP et clustering au niveau des villes

data_com = df[, lapply(.SD, sum), by = c("COM", "annee"), .SDcols = is.numeric]

ACP

df_com_2017 <- data_com[annee==2017]



df_num <- df_com_2017[, .SD, .SDcols = is.numeric]

  
df_num = df_num[,-"annee"]


res <- PCA(df_num,
           scale.unit = TRUE,
           graph = FALSE)
fviz_pca_var(
  res,
  repel = TRUE,
  col.var = "contrib"
)

fviz_pca_var(
  res,
  col.var = "contrib",
  select.var = list(contrib = 5),
  repel = TRUE
)

K means avec 10 clusters

set.seed(123)


# on récupère les coordonnées
ind_coords <- res$ind$coord

ind_coords_sub <- ind_coords[, 1:2]  # sélection des axes 1 et 2


km <- kmeans(ind_coords_sub, centers = 10, nstart = 25)

df_cluster <- df_com_2017
df_cluster$cluster <- km$cluster

fviz_cluster(km, data = ind_coords_sub,
             ellipse.type = "convex",
             geom = "point",
             palette = "jco",
             repel = TRUE)

df_cluster[, Paris := fifelse(substr(COM, 1, 3) == "751", 1, 0)]
print(df_cluster[Paris == 1, c("COM", "cluster")])
##        COM cluster
##     <char>   <int>
##  1:  75101       7
##  2:  75102       7
##  3:  75103       1
##  4:  75104       1
##  5:  75105       6
##  6:  75106       1
##  7:  75107       6
##  8:  75108       1
##  9:  75109       6
## 10:  75110       8
## 11:  75111       2
## 12:  75112      10
## 13:  75113       2
## 14:  75114      10
## 15:  75115       2
## 16:  75116       2
## 17:  75117       2
## 18:  75118       2
## 19:  75119       2
## 20:  75120       2
##        COM cluster
print(length(unique(df_cluster[Paris == 1, cluster])))
## [1] 6

K means avec 30 clusters

km <- kmeans(ind_coords_sub, centers = 30, nstart = 25)
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
## Warning: pas de convergence en 10 itérations
df_cluster <- df_com_2017
df_cluster$cluster <- km$cluster

fviz_cluster(km, data = ind_coords_sub,
             ellipse.type = "convex",
             geom = "point",
             palette = "jco",
             repel = TRUE)
## Warning: This manual palette can handle a maximum of 10 values. You have
## supplied 30
## Warning: This manual palette can handle a maximum of 10 values. You have
## supplied 30
## Warning: Removed 711 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '26'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '26'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '27'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '27'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '28'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '28'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '29'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '29'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '30'
## Warning in grid.Call.graphics(C_points, x$x, x$y, x$pch, x$size): unimplemented
## pch value '30'

df_cluster[, Paris := fifelse(substr(COM, 1, 3) == "751", 1, 0)]
print(df_cluster[Paris == 1, c("COM", "cluster")])
##        COM cluster
##     <char>   <int>
##  1:  75101       4
##  2:  75102       8
##  3:  75103      10
##  4:  75104       8
##  5:  75105       6
##  6:  75106      10
##  7:  75107       6
##  8:  75108      10
##  9:  75109       6
## 10:  75110      20
## 11:  75111      12
## 12:  75112       2
## 13:  75113      12
## 14:  75114       2
## 15:  75115      15
## 16:  75116      12
## 17:  75117      12
## 18:  75118      12
## 19:  75119      12
## 20:  75120      12
##        COM cluster
print(length(unique(df_cluster[Paris == 1, cluster])))
## [1] 8